Observed log-chlorophyll at representative station for the St. Lucie Estuary

library(tidyverse)
library(lubridate)
library(mgcv)  
library(plotly)
library(WRTDStidal)
library(gridExtra)
source('R/funcs.R')

# flow data, left moving window average of 30 days
data(sl_fldat)
fl_dat <- sl_fldat %>% 
  rename(date = Date) %>% 
  mutate(
    qsm = stats::filter(q, rep(1, 30)/30, sides = 1, method = 'convolution')
  )

# format the data to model
data(sl_dat)
sl_mod <- sl_dat %>%
  rename(date = Date) %>% 
  mutate(
    doy = yday(date), 
    dec_time = decimal_date(date), 
    yr = year(date),
    yr = factor(yr),
    mo = month(date, label = T)
  ) %>% 
  left_join(fl_dat, by = 'date') %>% 
  mutate(
    flo = log(qsm),
    lnchl = log(1 + chl)
    ) %>% 
  select(-q, -qsm, -sal)

# plot, all
p <- ggplot(sl_mod, aes(x = date, y = lnchl)) + 
  geom_line() +
  theme_bw() 
ggplotly(p)
# boxplot, by years
p <- ggplot(sl_mod, aes(x = yr, y = lnchl)) + 
  geom_boxplot() + 
  theme_bw()
ggplotly(p)
# boxplot, by month
p <- ggplot(sl_mod, aes(x = mo, y = lnchl)) + 
  geom_boxplot() + 
  theme_bw()
ggplotly(p)

Some simple GAMs to explore annual, seasonal trends.

# smooths to evaluate
smths <- c(
  "s(dec_time, bs = 'tp')",  
  "s(doy, bs = 'cc')",
  "te(dec_time, doy, bs = c('tp', 'cc'))"
)

# get all combinations of smoothers to model, one to many
frms <- list()
for(i in seq_along(smths)){
  
 frm <- combn(smths, i) %>%
    apply(2, function(x){
      paste(x, collapse = ' + ') %>% 
        paste('lnchl ~ ', .) %>% 
        formula
    }) 
  
 frms <- c(frms, frm)
 
}

# create models from smooth formula combinations
mods <- map(frms, function(frm){
  
  gam(frm, 
    knots = list(doy = c(1, 366)),
    data = sl_mod, 
    na.action = na.exclude
  )

})
names(mods) <- paste0('mod', seq_along(mods))

Summary stats of annual, seasonal models:

# smoother stats of GAMs
map(mods, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>% 
  enframe %>% 
  unnest %>% 
  kable
name smoother edf Ref.df F p.value
mod1 s(dec_time) 1.491837 1.833950 8.0481357 0.0017935
mod2 s(doy) 4.209953 8.000000 6.5199494 0.0000000
mod3 te(dec_time,doy) 8.433081 11.101531 5.1741398 0.0000001
mod4 s(dec_time) 1.882782 2.353842 6.0328718 0.0014303
mod4 s(doy) 4.222344 8.000000 6.6335533 0.0000000
mod5 s(dec_time) 1.000000 1.000000 5.6591977 0.0179168
mod5 te(dec_time,doy) 9.559921 15.000000 3.3266003 0.0000000
mod6 s(doy) 4.206482 8.000000 5.8392276 0.0000000
mod6 te(dec_time,doy) 2.741101 3.810399 3.9993657 0.0043228
mod7 s(dec_time) 1.000000 1.000000 10.6360338 0.0012203
mod7 s(doy) 4.164553 8.000000 5.1577131 0.0000000
mod7 te(dec_time,doy) 2.852311 15.000000 0.2807338 0.1611397
# summary stats of GAMs
map(mods, ~ data.frame(
    AIC = AIC(.x), 
    R2 = summary(.x)$r.sq)) %>% 
  enframe %>% 
  unnest %>% 
  kable
name AIC R2
mod1 654.5720 0.0350432
mod2 621.1283 0.1312536
mod3 619.4651 0.1457643
mod4 609.3679 0.1649803
mod5 619.0978 0.1517706
mod6 609.1895 0.1674090
mod7 608.6850 0.1711443
# prediction data
pred_dat <- crossing(
    yr = seq(floor(min(sl_mod$dec_time)), ceiling(max(sl_mod$dec_time))),
    doy = seq(1, 365, by = 5)
  ) %>%
  unite('date', yr, doy, sep = '-', remove = F) %>% 
  mutate(
    date = as.Date(date, format = '%Y-%j'),
    dec_time = decimal_date(date),
    mo = month(date, label = TRUE), 
    yr = year(date)
  ) %>% 
  left_join(., fl_dat[, c('date', 'qsm')]) %>% 
  mutate(flo = log(qsm)) %>% 
  select(-qsm)

# predictions
sl_res <- map(mods, function(x){
  pred_dat %>% 
    mutate(
      pred = predict(x, newdata = pred_dat)
    )
  }) %>% 
  enframe('mods') %>% 
  unnest

# plot
p <- ggplot(sl_res, aes(x = date)) + 
  geom_point(data = sl_mod, aes(y = lnchl), size = 0.5) + 
  geom_line(aes(y = pred, colour = mods)) + 
  theme_bw() + 
  theme(
    legend.position = 'top', 
    legend.title = element_blank()
    )
ggplotly(p)
# plot
p <- ggplot(sl_res, aes(x = doy, group = factor(yr), colour = yr)) + 
  geom_line(aes(y = pred)) + 
  theme_bw() + 
  theme(
    legend.position = 'top', 
    legend.title = element_blank()
    ) + 
  facet_wrap(~ mods, ncol = 2)
ggplotly(p)

Adding flow data to the model:

# smooths to evaluate
smths <- c(
  "s(dec_time, bs = 'tp')",  
  "s(doy, bs = 'cc')",
  "s(flo, bs = 'tp')",
  "te(flo, doy, bs = c('tp', 'cc'))", 
  "te(flo, dec_time, bs = c('tp', 'tp'))",
  "te(dec_time, doy, bs = c('tp', 'cc'))",
  "te(dec_time, doy, flo, bs = c('tp', 'cc', 'tp'))"
)

# get all combinations of smoothers to model, one to many
frms <- list()
for(i in seq_along(smths)){
  
 frm <- combn(smths, i) %>%
    apply(2, function(x){
      paste(x, collapse = ' + ') %>% 
        paste('lnchl ~ ', .) %>% 
        formula
    }) 
  
 frms <- c(frms, frm)
 
}

# create models from smooth formula combinations
mods2 <- map(frms, function(frm){
  
  gam(frm, 
    knots = list(doy = c(1, 366)),
    data = sl_mod, 
    na.action = na.exclude
  )

})
names(mods2) <- paste0('mod', seq_along(mods2))

Summary stats of best year/season model, year/season/flow model

# best model with only season, year
best1 <- map(mods,  AIC) %>% 
  unlist %>% 
  which.min %>% 
  mods[[.]]

# best model with season, year, flow
best2 <- map(mods2, AIC) %>% 
  unlist %>% 
  which.min %>% 
  mods2[[.]] 

best <- list(best1 = best1, best2 = best2)

# smoother stats of GAMs
map(best, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>% 
  enframe %>% 
  unnest %>% 
  kable
name smoother edf Ref.df F p.value
best1 s(dec_time) 1.0000001 1.000000 10.6360338 0.0012203
best1 s(doy) 4.1645525 8.000000 5.1577131 0.0000000
best1 te(dec_time,doy) 2.8523113 15.000000 0.2807338 0.1611397
best2 s(dec_time) 1.0000008 1.000001 1.9293923 0.1658109
best2 s(doy) 2.9402431 8.000000 0.7092063 0.0456631
best2 s(flo) 6.7839630 7.863753 2.9627043 0.0049629
best2 te(flo,doy) 2.9616367 12.000000 0.5845557 0.0124682
best2 te(flo,dec_time) 0.2050133 15.000000 0.0193067 0.0936091
best2 te(dec_time,doy) 0.0000023 15.000000 0.0000001 0.8125709
best2 te(dec_time,doy,flo) 19.8642287 48.000000 1.0091690 0.0000325
# summary stats of GAMs
map(best, ~ data.frame(
    AIC = AIC(.x), 
    R2 = summary(.x)$r.sq)) %>% 
  enframe %>% 
  unnest %>% 
  kable
name AIC R2
best1 608.6850 0.1711443
best2 556.8277 0.3351218
# predictions
sl_res2 <- map(best, function(x){
  pred_dat %>% 
    mutate(
      pred = predict(x, newdata = pred_dat)
    )
  }) %>% 
  enframe('mods') %>% 
  unnest

# plot
p <- ggplot(sl_res2, aes(x = date)) + 
  geom_point(data = sl_mod, aes(y = lnchl), size = 0.5) + 
  geom_line(aes(y = pred, colour = mods)) + 
  theme_bw() + 
  theme(
    legend.position = 'top', 
    legend.title = element_blank()
    )
ggplotly(p)
ptheme <- theme(
  axis.title.x = element_blank(), 
  axis.title.y = element_blank()
)
cols <- 'Spectral'
pb1 <- dynagam(best1, pred_dat, ncol = 1, col_vec = cols) + 
  ptheme + 
  theme(legend.position = 'none') +
  ggtitle('Best 1')
pb2 <- dynagam(best2, pred_dat, ncol = 1, col_vec = cols) + 
  ptheme + 
  ggtitle('Best2')
pleg <- g_legend(pb2)
pb2 <- pb2 + 
  theme(legend.position = 'none')

grid.arrange(
  pleg, 
  arrangeGrob(pb1, pb2, ncol = 2, bottom = 'lnQ', left = 'lnchl'), 
  ncol = 1, 
  heights = c(0.1, 1)
)